This notebook is a complementary code of the master thesis project for the master of Mathematics at the VU

In this notebook one can find a step by step process on finding anomalies of SIF over the Amazonia from the TROPOSIF dataset both with CAE and with the z-score method. At the end we also perform a comparison with the droughts recorded by the Monitor de Secas (MS).

Make sure to follow one cell at a time to understand the process, you will find the autoencoder architecture and the training in some cells bellow. You can avoid having to run this long process by skipping some cells (will be explained in detail further on the notebook) and use a file with the already train autoencoder. This will not only save time, but also ensure that the results you obtain are exactly the results that I used to write my master thesis.

If any questions were to arrise, do not hesitate to contact me: carmen-oliver.huidobro@uv.es

In [1]:
import xarray as xr
import numpy as np
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import os
import matplotlib.pyplot as plt
import pandas as pd
import re
from skimage.transform import resize
from keras import regularizers
from keras.layers import Input, Conv3D, MaxPooling3D, UpSampling3D, Cropping3D
from keras.models import Model
import numpy as np
from keras.models import Model
from matplotlib.patches import Patch
import matplotlib.image as mpimg
import cv2
import numpy as np
import glob
import matplotlib.image as mpimg
from keras.models import Model
from keras.models import load_model
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from matplotlib.patches import Patch
import time
from codecarbon import EmissionsTracker
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/urllib3/__init__.py:35: NotOpenSSLWarning: urllib3 v2 only supports OpenSSL 1.1.1+, currently the 'ssl' module is compiled with 'LibreSSL 2.8.3'. See: https://github.com/urllib3/urllib3/issues/3020
  warnings.warn(

We are using a rectangle to bound the Amazonia area with latmin, latmax, lonmin and lonmax. These can be modified if one wishes to find SIF anomalies in other areas of the world.

In [2]:
lat_min, lat_max = -15, 5
lon_min, lon_max = -75, -50

data_dir = "/Users/Carmen/Desktop/SIF_anomalies/SIF_DATA_TROPOMI/" #change according to your path

sif_amazon = []
time_list = []

files = sorted([f for f in os.listdir(data_dir) if f.endswith(".nc")])
print("Files found:", files)
for file in files:
    match = re.search(r"month-(\d{6})", file)
    if match:
        date_str = match.group(1)  
        year = int(date_str[:4])  
        month = int(date_str[4:6])  

        file_path = os.path.join(data_dir, file)
        
        try:
            ds = xr.open_dataset(file_path)
        except Exception as e:
            print(f"Error opening {file_path}: {e}")
            continue
        ds_amazon = ds.sel(latitude=slice(lat_min, lat_max), longitude=slice(lon_min, lon_max))
        
        sif_amazon.append(ds_amazon["solar_induced_fluorescence"].values)  
        time_list.append(f"{year}-{month:02d}")
        # Ensure data was loaded
if not sif_amazon or not time_list:
    raise ValueError("No valid data found. Check the input files and filtering logic.")
sif_amazon = np.stack(sif_amazon, axis=0).squeeze()  # Shape: [N_months, lat, lon]

years = sorted(set(int(t.split("-")[0]) for t in time_list))
num_years = len(years)

sif_monthly = np.full((num_years, 12, *sif_amazon.shape[1:]), np.nan)
for i, month_data in enumerate(sif_amazon):
    year_idx = i // 12  
    month_idx = i % 12  
    sif_monthly[year_idx, month_idx] = month_data 


print("Fixed sif_monthly shape:", sif_monthly.shape)
Found files: ['s5p-l3grd-sif-001-month-20191201-20240325.nc', 's5p-l3grd-sif-001-month-20190501-20240325.nc', 's5p-l3grd-sif-001-month-20230201-20240325.nc', 's5p-l3grd-sif-001-month-20190101-20240325.nc', 's5p-l3grd-sif-001-month-20231101-20240325.nc', 's5p-l3grd-sif-001-month-20230601-20240325.nc', 's5p-l3grd-sif-001-month-20220801-20240325.nc', 's5p-l3grd-sif-001-month-20210401-20240325.nc', 's5p-l3grd-sif-001-month-20210301-20240325.nc', 's5p-l3grd-sif-001-month-20210701-20240325.nc', 's5p-l3grd-sif-001-month-20211001-20240325.nc', 's5p-l3grd-sif-001-month-20190601-20240325.nc', 's5p-l3grd-sif-001-month-20191101-20240325.nc', 's5p-l3grd-sif-001-month-20230101-20240325.nc', 's5p-l3grd-sif-001-month-20240701-20240809.nc', 's5p-l3grd-sif-001-month-20190201-20240325.nc', 's5p-l3grd-sif-001-month-20230501-20240325.nc', 's5p-l3grd-sif-001-month-20200901-20240325.nc', 's5p-l3grd-sif-001-month-20231201-20240325.nc', 's5p-l3grd-sif-001-month-20190801-20240325.nc', 's5p-l3grd-sif-001-month-20200301-20240325.nc', 's5p-l3grd-sif-001-month-20200701-20240325.nc', 's5p-l3grd-sif-001-month-20201001-20240325.nc', 's5p-l3grd-sif-001-month-20220101-20240325.nc', 's5p-l3grd-sif-001-month-20210901-20240325.nc', 's5p-l3grd-sif-001-month-20220501-20240325.nc', 's5p-l3grd-sif-001-month-20221201-20240325.nc', 's5p-l3grd-sif-001-month-20220201-20240325.nc', 's5p-l3grd-sif-001-month-20240301-20240403.nc', 's5p-l3grd-sif-001-month-20221101-20240325.nc', 's5p-l3grd-sif-001-month-20220601-20240325.nc', 's5p-l3grd-sif-001-month-20200401-20240325.nc', 's5p-l3grd-sif-001-month-20230801-20240325.nc', 's5p-l3grd-sif-001-month-20220701-20240325.nc', 's5p-l3grd-sif-001-month-20241101-20241203.nc', 's5p-l3grd-sif-001-month-20221001-20240325.nc', 's5p-l3grd-sif-001-month-20241001-20241107.nc', 's5p-l3grd-sif-001-month-20220301-20240325.nc', 's5p-l3grd-sif-001-month-20230901-20240325.nc', 's5p-l3grd-sif-001-month-20200501-20240325.nc', 's5p-l3grd-sif-001-month-20201201-20240325.nc', 's5p-l3grd-sif-001-month-20240401-20240504.nc', 's5p-l3grd-sif-001-month-20200101-20240325.nc', 's5p-l3grd-sif-001-month-20240601-20240827.nc', 's5p-l3grd-sif-001-month-20201101-20240325.nc', 's5p-l3grd-sif-001-month-20200601-20240325.nc', 's5p-l3grd-sif-001-month-20200201-20240325.nc', 's5p-l3grd-sif-001-month-20190901-20240325.nc', 's5p-l3grd-sif-001-month-20220401-20240325.nc', 's5p-l3grd-sif-001-month-20210801-20240325.nc', 's5p-l3grd-sif-001-month-20211101-20240325.nc', 's5p-l3grd-sif-001-month-20210601-20240325.nc', 's5p-l3grd-sif-001-month-20240501-20240828.nc', 's5p-l3grd-sif-001-month-20210201-20240325.nc', 's5p-l3grd-sif-001-month-20200801-20240325.nc', 's5p-l3grd-sif-001-month-20230401-20240325.nc', 's5p-l3grd-sif-001-month-20190301-20240325.nc', 's5p-l3grd-sif-001-month-20240101-20240313.nc', 's5p-l3grd-sif-001-month-20240801-20240903.nc', 's5p-l3grd-sif-001-month-20191001-20240325.nc', 's5p-l3grd-sif-001-month-20190701-20240325.nc', 's5p-l3grd-sif-001-month-20230701-20240325.nc', 's5p-l3grd-sif-001-month-20231001-20240325.nc', 's5p-l3grd-sif-001-month-20240201-20240313.nc', 's5p-l3grd-sif-001-month-20230301-20240325.nc', 's5p-l3grd-sif-001-month-20190401-20240325.nc', 's5p-l3grd-sif-001-month-20210501-20240325.nc', 's5p-l3grd-sif-001-month-20220901-20240325.nc', 's5p-l3grd-sif-001-month-20240901-20241006.nc', 's5p-l3grd-sif-001-month-20211201-20240325.nc', 's5p-l3grd-sif-001-month-20210101-20240325.nc']
Files found: ['s5p-l3grd-sif-001-month-20190101-20240325.nc', 's5p-l3grd-sif-001-month-20190201-20240325.nc', 's5p-l3grd-sif-001-month-20190301-20240325.nc', 's5p-l3grd-sif-001-month-20190401-20240325.nc', 's5p-l3grd-sif-001-month-20190501-20240325.nc', 's5p-l3grd-sif-001-month-20190601-20240325.nc', 's5p-l3grd-sif-001-month-20190701-20240325.nc', 's5p-l3grd-sif-001-month-20190801-20240325.nc', 's5p-l3grd-sif-001-month-20190901-20240325.nc', 's5p-l3grd-sif-001-month-20191001-20240325.nc', 's5p-l3grd-sif-001-month-20191101-20240325.nc', 's5p-l3grd-sif-001-month-20191201-20240325.nc', 's5p-l3grd-sif-001-month-20200101-20240325.nc', 's5p-l3grd-sif-001-month-20200201-20240325.nc', 's5p-l3grd-sif-001-month-20200301-20240325.nc', 's5p-l3grd-sif-001-month-20200401-20240325.nc', 's5p-l3grd-sif-001-month-20200501-20240325.nc', 's5p-l3grd-sif-001-month-20200601-20240325.nc', 's5p-l3grd-sif-001-month-20200701-20240325.nc', 's5p-l3grd-sif-001-month-20200801-20240325.nc', 's5p-l3grd-sif-001-month-20200901-20240325.nc', 's5p-l3grd-sif-001-month-20201001-20240325.nc', 's5p-l3grd-sif-001-month-20201101-20240325.nc', 's5p-l3grd-sif-001-month-20201201-20240325.nc', 's5p-l3grd-sif-001-month-20210101-20240325.nc', 's5p-l3grd-sif-001-month-20210201-20240325.nc', 's5p-l3grd-sif-001-month-20210301-20240325.nc', 's5p-l3grd-sif-001-month-20210401-20240325.nc', 's5p-l3grd-sif-001-month-20210501-20240325.nc', 's5p-l3grd-sif-001-month-20210601-20240325.nc', 's5p-l3grd-sif-001-month-20210701-20240325.nc', 's5p-l3grd-sif-001-month-20210801-20240325.nc', 's5p-l3grd-sif-001-month-20210901-20240325.nc', 's5p-l3grd-sif-001-month-20211001-20240325.nc', 's5p-l3grd-sif-001-month-20211101-20240325.nc', 's5p-l3grd-sif-001-month-20211201-20240325.nc', 's5p-l3grd-sif-001-month-20220101-20240325.nc', 's5p-l3grd-sif-001-month-20220201-20240325.nc', 's5p-l3grd-sif-001-month-20220301-20240325.nc', 's5p-l3grd-sif-001-month-20220401-20240325.nc', 's5p-l3grd-sif-001-month-20220501-20240325.nc', 's5p-l3grd-sif-001-month-20220601-20240325.nc', 's5p-l3grd-sif-001-month-20220701-20240325.nc', 's5p-l3grd-sif-001-month-20220801-20240325.nc', 's5p-l3grd-sif-001-month-20220901-20240325.nc', 's5p-l3grd-sif-001-month-20221001-20240325.nc', 's5p-l3grd-sif-001-month-20221101-20240325.nc', 's5p-l3grd-sif-001-month-20221201-20240325.nc', 's5p-l3grd-sif-001-month-20230101-20240325.nc', 's5p-l3grd-sif-001-month-20230201-20240325.nc', 's5p-l3grd-sif-001-month-20230301-20240325.nc', 's5p-l3grd-sif-001-month-20230401-20240325.nc', 's5p-l3grd-sif-001-month-20230501-20240325.nc', 's5p-l3grd-sif-001-month-20230601-20240325.nc', 's5p-l3grd-sif-001-month-20230701-20240325.nc', 's5p-l3grd-sif-001-month-20230801-20240325.nc', 's5p-l3grd-sif-001-month-20230901-20240325.nc', 's5p-l3grd-sif-001-month-20231001-20240325.nc', 's5p-l3grd-sif-001-month-20231101-20240325.nc', 's5p-l3grd-sif-001-month-20231201-20240325.nc', 's5p-l3grd-sif-001-month-20240101-20240313.nc', 's5p-l3grd-sif-001-month-20240201-20240313.nc', 's5p-l3grd-sif-001-month-20240301-20240403.nc', 's5p-l3grd-sif-001-month-20240401-20240504.nc', 's5p-l3grd-sif-001-month-20240501-20240828.nc', 's5p-l3grd-sif-001-month-20240601-20240827.nc', 's5p-l3grd-sif-001-month-20240701-20240809.nc', 's5p-l3grd-sif-001-month-20240801-20240903.nc', 's5p-l3grd-sif-001-month-20240901-20241006.nc', 's5p-l3grd-sif-001-month-20241001-20241107.nc', 's5p-l3grd-sif-001-month-20241101-20241203.nc']
Fixed sif_monthly shape: (6, 12, 911, 1137)
In [3]:
has_nans = np.isnan(sif_monthly).any()
print("Does sif_monthly contain NaNs?", has_nans)
mean_sif_monthly = np.nanmean(sif_monthly)
sif_monthly = np.nan_to_num(sif_monthly, nan=mean_sif_monthly)


sif_monthly = xr.DataArray(
    sif_monthly,
    dims=["year", "month", "latitude", "longitude"],
    coords={
        "year" : np.arange(2019, 2019 + num_years),
        "month" : np.arange(1, 13),
        "latitude" : ds_amazon["latitude"].values,
        "longitude" : ds_amazon["longitude"].values
    }
)



sif_monthly_climatology = sif_monthly.sel(year=slice(2019, 2023))

climatology = sif_monthly_climatology.mean(dim="year", skipna=True)

monthly_means = climatology.mean(dim=["latitude", "longitude"], skipna=True)

overall_mean_sif = monthly_means.mean(skipna=True)

months = ['January', 'February', 'March', 'April', 'May', 'June',
          'July', 'August', 'September', 'October', 'November', 'December']

climatology_table = pd.DataFrame({
    "Month": months,
    "Avg SIF": monthly_means.values.round(3)
})

print("Monthly Climatology Summary:")
print(climatology_table)
print("\nOverall Mean SIF:", round(overall_mean_sif.item(), 3))
Does sif_monthly contain NaNs? True
Monthly Climatology Summary:
        Month  Avg SIF
0     January    1.398
1    February    1.319
2       March    1.286
3       April    1.213
4         May    1.167
5        June    1.117
6        July    1.079
7      August    1.061
8   September    1.105
9     October    1.292
10   November    1.404
11   December    1.452

Overall Mean SIF: 1.241
In [4]:
plt.figure(figsize=(10,4))
plt.plot(months, monthly_means, color='green', label ='Climatology (2019-2023)')
sif_monthly.sel(year=2024).mean(dim=["latitude", "longitude"]).plot(label='2024')
plt.ylabel("SIF - mW m⁻² nm⁻¹ sr⁻¹")
plt.xticks(rotation=45)
plt.legend()
plt.savefig("pic/sif_climatology.png")
plt.show()
No description has been provided for this image
In [5]:
# Choose a month (0-11)
sif_july = climatology.sel(month=7)  

fig = plt.figure(figsize=(10, 6))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())

ax.add_feature(cfeature.LAND, facecolor="lightgray", alpha=0.3)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.RIVERS, alpha=0.5)
ax.add_feature(cfeature.LAKES, alpha=0.3)

im = sif_july.plot.pcolormesh(
    ax=ax,
    cmap="YlGn",
    add_colorbar=False,  
    add_labels=False,
    transform=ccrs.PlateCarree()
)

cbar = plt.colorbar(im, ax=ax, orientation="vertical", pad=0.03, shrink=0.8)
cbar.set_label("SIF - mW m⁻² nm⁻¹ sr⁻¹")

plt.title("Climatology Map of Amazonas - July")
plt.show()


sif_july = climatology.isel(month=6)   # 6 = July (if 0=Jan, 1=Feb,...)

fig = plt.figure(figsize=(10, 6))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent([-90, -30, -35, 20], crs=ccrs.PlateCarree()) 

ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.RIVERS, alpha=0.5)
ax.add_feature(cfeature.LAKES, alpha=0.3)

im = sif_july.plot.pcolormesh(
    ax=ax,
    cmap="YlGn",
    transform=ccrs.PlateCarree(),
    add_colorbar=False   
)

cbar = plt.colorbar(im, ax=ax, orientation="vertical", pad=0.03, shrink=0.8)
cbar.set_label("SIF - mW m⁻² nm⁻¹ sr⁻¹")

ax.set_xlabel("Longitude (°W to °E)")
ax.set_ylabel("Latitude (°S to °N)")
plt.title("Climatology Map of Amazonas - July")
plt.tight_layout()
plt.show()
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/50m_physical/ne_50m_land.zip
  warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/50m_cultural/ne_50m_admin_0_boundary_lines_land.zip
  warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/50m_physical/ne_50m_coastline.zip
  warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/50m_physical/ne_50m_rivers_lake_centerlines.zip
  warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/50m_physical/ne_50m_lakes.zip
  warnings.warn(f'Downloading: {url}', DownloadWarning)
No description has been provided for this image
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_physical/ne_110m_land.zip
  warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_cultural/ne_110m_admin_0_boundary_lines_land.zip
  warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_physical/ne_110m_coastline.zip
  warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_physical/ne_110m_rivers_lake_centerlines.zip
  warnings.warn(f'Downloading: {url}', DownloadWarning)
/Users/carmen/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_physical/ne_110m_lakes.zip
  warnings.warn(f'Downloading: {url}', DownloadWarning)
No description has been provided for this image

The following process creates more datapoints with less resolutions from the previously gathered data

In [7]:
def create_strided_downsamples(data, modulo):
    h, w = data.shape[-2], data.shape[-1]

    new_h = (h // modulo) * modulo
    new_w = (w // modulo) * modulo
    cropped = data[..., :new_h, :new_w]

    versions = []
    for row_mod in range(modulo):
        for col_mod in range(modulo):
            version = cropped[..., row_mod::modulo, col_mod::modulo]
            versions.append(version)

    return np.stack(versions, axis=1)
sif_strided = create_strided_downsamples(sif_monthly[0:5],10) 
print("sif_strided shape:", sif_strided.shape)

sif_08_2024_downsampled = sif_strided[0,2, 7, :, :] 
print("sif_08_2024_downsampled shape:", sif_08_2024_downsampled.shape) 

sif_patches = sif_strided.reshape(-1, 12, sif_strided.shape[-2], sif_strided.shape[-1])
sif_monthly_resized = sif_monthly.coarsen(latitude=10, longitude=10, boundary='trim').mean()

# Convert back to xarray DataArray for consistency
sif_patches = xr.DataArray(
    sif_patches,
    dims=["sample", "month", "latitude", "longitude"],
    coords={
        "sample": np.arange(sif_patches.shape[0]),
        "month": np.arange(1, 13),
        "latitude": sif_monthly_resized.latitude.values,
        "longitude": sif_monthly_resized.longitude.values,
    }
)

print("sif_patches shape:", sif_patches.shape)
print( np.isnan(sif_patches).any())

fig = plt.figure(figsize=(10, 6))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())

ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.RIVERS, alpha=0.5)
ax.add_feature(cfeature.LAKES, alpha=0.3)

xticks = np.linspace(lon_min, lon_max, 6)
yticks = np.linspace(lat_min, lat_max, 5)
ax.set_xticks(xticks, crs=ccrs.PlateCarree())
ax.set_yticks(yticks, crs=ccrs.PlateCarree())

im = ax.pcolormesh(
    np.linspace(lon_min, lon_max, sif_patches.shape[-1]),
    np.linspace(lat_min, lat_max, sif_patches.shape[-2]),
    sif_08_2024_downsampled,
    cmap="YlGn",
    shading="auto",
    transform=ccrs.PlateCarree()
)
cbar = plt.colorbar(im, ax=ax, orientation="vertical", pad=0.03 , shrink = 1.2)
cbar.set_label("SIF -  mW m⁻² nm⁻¹ sr⁻¹")
plt.xlabel("Longitude (°W to °E)")
plt.ylabel("Latitude (°S to °N)")
plt.show()

sif_patches_flat = sif_patches.to_numpy().flatten()

plt.figure(figsize=(10, 6))
plt.hist(sif_patches_flat, bins=100, color='blue', alpha=0.7)

percentile_5 = np.percentile(sif_patches_flat, 1)
percentile_95 = np.percentile(sif_patches_flat, 99)

plt.axvline(percentile_5, color='red', linestyle='--', label='1th Percentile')
plt.axvline(percentile_95, color='green', linestyle='--', label='99th Percentile')
plt.legend()
plt.xlabel("SIF Value - mW m⁻² nm⁻¹ sr⁻¹")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()
sif_strided shape: (5, 100, 12, 91, 113)
sif_08_2024_downsampled shape: (91, 113)
sif_patches shape: (500, 12, 91, 113)
<xarray.DataArray ()> Size: 1B
array(False)
No description has been provided for this image
No description has been provided for this image

The following three cells define the autoencoder, train it and save it. Do not run them if you want to obtain the exact same results as I showed in my thesis.

If you want to reproduce the results that I obtained go 4 cells bellow and run the following ones as it will then use the saved autoencoder model already trained by me before.

In [8]:
input_sequences = sif_patches.values[..., np.newaxis]  
input_seq = Input(shape=(12, sif_patches.shape[2], sif_patches.shape[3], 1))
print(input_sequences.shape)

# # Encoder
# x = Conv3D(16, (3, 3, 3), activation='relu', padding='same')(input_seq)
# x = MaxPooling3D((2, 2, 2), padding='same')(x)
# x = Conv3D(8, (3, 3, 3), activation='relu', padding='same')(x)
# encoded = MaxPooling3D((2, 2, 2), padding='same',
#                        activity_regularizer=regularizers.l1(1e-5))(x)

# # Decoder (mirror of encoder)
# x = Conv3D(8, (3, 3, 3), activation='relu', padding='same')(encoded)
# x = UpSampling3D((2, 2, 2))(x)
# x = Conv3D(16, (3, 3, 3), activation='relu', padding='same')(x)
# x = UpSampling3D((2, 2, 2))(x)
# x = Cropping3D(((0, 0), (0,  x.shape[2] - input_seq.shape[2]), (0, x.shape[3] - input_seq.shape[3])))(x)
# decoded = Conv3D(1, (3, 3, 3), activation='relu', padding='same')(x)

# # # Autoencoder Model
# autoencoder2 = Model(input_seq, decoded)
# autoencoder2.compile(optimizer='adam', loss='mse')
# autoencoder2.summary()
(500, 12, 91, 113, 1)
In [9]:
# #np.random.seed(42)  #to get reproducible results
# autoencoder2.fit(input_sequences, input_sequences, 
#                 epochs=50,
#                 batch_size=30,
#                 validation_split=0.2,
#                 shuffle=True) 
In [10]:
# autoencoder2.save("autoencoder_sif_model.keras")
# print("Model saved to autoencoder_sif_model.keras")


# model_path = "autoencoder_sif_model.keras"
# if os.path.isfile(model_path) and os.path.getsize(model_path) > 0:
#     print(f"Model file '{model_path}' exists and is not empty.")
# else:
#     print(f"Model file '{model_path}' is missing or empty.")

# if 'autoencoder2' in locals() and hasattr(autoencoder2, 'history') and autoencoder2.history is not None:
#     history = autoencoder2.history
# else:
#     print("Training history not found. Please ensure you have the training history object.")
#     history = None

# if history is not None:
#     if hasattr(history, 'history'):
#         hist_dict = history.history
#     else:
#         hist_dict = history

#     plt.figure(figsize=(8, 5))
#     plt.plot(hist_dict['loss'], linewidth=0.7,label='Training Loss')
#     if 'val_loss' in hist_dict:
#         plt.plot(hist_dict['val_loss'], linewidth=0.7, label='Validation Loss')
#     plt.xlabel('Epoch')
#     plt.ylabel('Loss (MSE)')
#     plt.title('Autoencoder Training and Validation Loss')
#     plt.legend()
#     plt.grid(True)
#     plt.tight_layout()
#     plt.show()

With the line " autoencoder2 = load_model("autoencoder_sif_model.keras") " we are reproducing the results that I described in my thesis project. Run the cells from here to reproduce the same results

In [11]:
autoencoder2 = load_model("autoencoder_sif_model.keras")
In [12]:
encoded_layer = None
for layer in autoencoder2.layers:
    if layer.name == 'max_pooling3d_77':  
        encoded_layer = layer
        break
if encoded_layer is None:
    raise ValueError("Encoded layer not found in autoencoder2.")



reconstruct = autoencoder2.predict(input_sequences)
print("Reconstructed shape:", reconstruct.shape) 

sample_idx = 100  
fig, axes = plt.subplots(2, 5, figsize=(15, 6))
time_steps = [0, 3, 6, 9, 11]

vmin = min(sif_patches[sample_idx, t, :, :].min() for t in time_steps)
vmax = max(sif_patches[sample_idx, t, :, :].max() for t in time_steps)

for i, t in enumerate(time_steps):
    im0 = axes[0, i].imshow(reconstruct[sample_idx, t, :, :], cmap='viridis', origin='lower', vmin=vmin, vmax=vmax)
    axes[0, i].set_title(f'Reconstructed Month {t+1}')
    axes[0, i].axis('off')

    im1 = axes[1, i].imshow(sif_patches.values[sample_idx, t, :, :], cmap='viridis', origin='lower', vmin=vmin, vmax=vmax)
    axes[1, i].set_title(f'Input Month {t+1}')
    axes[1, i].axis('off')

fig.subplots_adjust(right=0.88)
cbar_ax = fig.add_axes([0.9, 0.15, 0.02, 0.7])
fig.colorbar(im0, cax=cbar_ax, label='SIF Value')

plt.show()
16/16 ━━━━━━━━━━━━━━━━━━━━ 5s 308ms/step
Reconstructed shape: (500, 12, 91, 113, 1)
No description has been provided for this image
In [13]:
input_means = np.mean(sif_patches.values, axis=(2, 3))  
recon_means = np.mean(reconstruct[..., 0], axis=(2, 3))  
sample = 100
plt.figure(figsize=(10, 5))
plt.plot(range(12), input_means[sample], marker='o', label='Input Sample 100')
plt.plot(range(12), recon_means[sample], marker='o', label='Reconstructed (CAE)')
plt.xticks(range(12), months, rotation=45)
plt.xlabel('Month')
plt.ylabel('Mean SIF Value - mW m⁻² nm⁻¹ sr⁻¹')
plt.legend()
plt.show()
No description has been provided for this image
In [14]:
sif_monthly_resized = sif_monthly.coarsen(latitude=10, longitude=10, boundary='trim').mean()

plt.imshow(sif_monthly_resized[0, 0], cmap='YlGn', origin = 'lower',  extent=[lon_min, lon_max, lat_min, lat_max])
plt.show()

sif_mon = sif_monthly_resized.values
original = sif_mon[..., np.newaxis]  

# Start tracking time and energy
tracker = EmissionsTracker()
tracker.start()
start_time = time.time()

reconstructed = autoencoder2.predict(original)

# Stop tracking time and energy
end_time = time.time()
emissions = tracker.stop()

print(f"Time taken: {end_time - start_time:.10f} seconds")
print(f"Energy consumed: {emissions:.10f} kWh")

print("Reconstructed shape:", reconstructed.shape)  # Should be (6, 12, 91, 113, 1)

reconstructed = reconstructed[..., 0]
original = original[..., 0]
print("Original shape:", original.shape, "Reconstructed shape:", reconstructed.shape)

reconstructed = xr.DataArray(
    reconstructed,
    dims=["year", "month", "latitude", "longitude"],
    coords={
        "year": sif_monthly_resized.year.values,
        "month": sif_monthly_resized.month.values,
        "latitude": sif_monthly_resized.latitude.values,
        "longitude": sif_monthly_resized.longitude.values,
    }
)

original = xr.DataArray(
    original,
    dims=["year", "month", "latitude", "longitude"],
    coords={
        "year": sif_monthly_resized.year.values,
        "month": sif_monthly_resized.month.values,
        "latitude": sif_monthly_resized.latitude.values,
        "longitude": sif_monthly_resized.longitude.values,
    }
)

reconstruction_error_pixel = (original.sel(year=slice(2019, 2023)) -
                              reconstructed.sel(year=slice(2019, 2023))) ** 2  #MSE, leave 2024 out to compute threshold
print("Reconstruction error shape:", reconstruction_error_pixel.shape) 

#this threshold is for general areas, not pixel per pixel 
thresholds_per_month = reconstruction_error_pixel.quantile(0.99, dim=["year", "latitude", "longitude"])

months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

mean_errors = reconstruction_error_pixel.mean(dim=["year", "latitude", "longitude"])

plt.figure(figsize=(12, 6))
plt.plot(mean_errors["month"], mean_errors, 'b-', label='Mean Error', marker='o')
plt.plot(thresholds_per_month["month"], thresholds_per_month, 'r--', label='99th Percentile Threshold')
plt.xticks(range(1, 13), months)
plt.xlabel('Month')
plt.ylabel('Reconstruction Error')
plt.title('Monthly Reconstruction Error and 99th Percentile Threshold')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


anomalies_month = (reconstruction_error_pixel > thresholds_per_month).astype(int)
print(anomalies_month.shape)  

extended_months = [f"{months[m-1]}-{y}" 
                   for y in reconstructed.year.values 
                   for m in reconstructed.month.values]


climatology = sif_monthly_resized.sel(year=years[:5])  # 2019–2023
climatology_mean = climatology.mean(dim="year")

climatology_24_months = xr.concat([climatology_mean.mean(dim=["latitude","longitude"])] * 6, dim="year").values.flatten()  
climatology_AE_mean = reconstructed.mean(dim=["year", "latitude", "longitude"])
climatology_AE = xr.concat([climatology_AE_mean] * 6, dim="year")
climatology_AE = climatology_AE.assign_coords(year=np.arange(2019, 2019+6)).values.flatten()
AE_reconstruction = reconstructed.mean(dim=["latitude", "longitude"]).values.flatten()
sif_measured_24_months = sif_monthly_resized.mean(dim=["latitude","longitude"]).values.flatten()


plt.figure(figsize=(14, 6))
plt.plot(extended_months, climatology_AE, label="Climatology (Expected by AE)", color="yellow", marker="o")
plt.plot(extended_months, climatology_24_months, label="Climatology (Mean of all data)", color="blue", marker="o")
plt.plot(extended_months, AE_reconstruction, label="Reconstructed SIF (Autoencoder)", color="red", marker="o")
plt.plot(extended_months, sif_measured_24_months, label="Measured SIF (Satellite)", color="green", marker="o")
plt.xlabel("Month (2023-2024)")
plt.ylabel("SIF Value")
plt.title("Climatology vs Measured SIF (2023-2024)")
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

plt.figure(figsize=(12, 6))
plt.plot(months, climatology_AE_mean, label='Climatology Mean (2019-2023)', color='blue')
plt.plot(months, np.nanmean(sif_monthly_resized[5], axis=(1, 2)), label='2024', color='red', marker='o')
plt.xlabel('Month')
plt.ylabel('SIF Value')
plt.title('Monthly SIF Climatology (2019-2023) with 2024')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
No description has been provided for this image
[codecarbon WARNING @ 15:35:54] Multiple instances of codecarbon are allowed to run at the same time.
[codecarbon WARNING @ 15:35:54] Error while trying to count physical CPUs: [Errno 2] No such file or directory: 'lscpu'. Defaulting to 1.
[codecarbon INFO @ 15:35:54] [setup] RAM Tracking...
[codecarbon INFO @ 15:35:54] [setup] CPU Tracking...
[codecarbon WARNING @ 15:35:54] We saw that you have a Apple M3 Pro but we don't know it. Please contact us.
[codecarbon WARNING @ 15:35:54] No CPU tracking mode found. Falling back on estimation based on TDP for CPU. 
 Mac OS and ARM processor detected: Please enable PowerMetrics sudo to measure CPU

[codecarbon INFO @ 15:35:54] CPU Model on constant consumption mode: Apple M3 Pro
[codecarbon WARNING @ 15:35:54] No CPU tracking mode found. Falling back on CPU constant mode.
[codecarbon INFO @ 15:35:54] [setup] GPU Tracking...
[codecarbon INFO @ 15:35:54] No GPU found.
[codecarbon INFO @ 15:35:54] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: global constant
                GPU Tracking Method: Unspecified
            
[codecarbon INFO @ 15:35:54] >>> Tracker's metadata:
[codecarbon INFO @ 15:35:54]   Platform system: macOS-14.6-arm64-arm-64bit
[codecarbon INFO @ 15:35:54]   Python version: 3.9.6
[codecarbon INFO @ 15:35:54]   CodeCarbon version: 3.0.8
[codecarbon INFO @ 15:35:54]   Available RAM : 18.000 GB
[codecarbon INFO @ 15:35:54]   CPU count: 11 thread(s) in 1 physical CPU(s)
[codecarbon INFO @ 15:35:54]   CPU model: Apple M3 Pro
[codecarbon INFO @ 15:35:54]   GPU count: None
[codecarbon INFO @ 15:35:54]   GPU model: None
[codecarbon INFO @ 15:35:56] Emissions data (if any) will be saved to file /Users/carmen/Desktop/SIF_anomalies/emissions.csv
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 80ms/step
[codecarbon INFO @ 15:35:56] Energy consumed for RAM : 0.000000 kWh. RAM Power : 6.0 W
[codecarbon INFO @ 15:35:56] Delta energy consumed for CPU with constant : 0.000001 kWh, power : 42.5 W
[codecarbon INFO @ 15:35:56] Energy consumed for All CPU : 0.000001 kWh
[codecarbon INFO @ 15:35:56] 0.000002 kWh of electricity and 0.000000 L of water were used since the beginning.
[codecarbon WARNING @ 15:35:56] The CSV format has changed, backing up old emission file.
Time taken: 0.1195809841 seconds
Energy consumed: 0.0000002836 kWh
Reconstructed shape: (6, 12, 91, 113, 1)
Original shape: (6, 12, 91, 113) Reconstructed shape: (6, 12, 91, 113)
Reconstruction error shape: (5, 12, 91, 113)
No description has been provided for this image
(5, 12, 91, 113)
No description has been provided for this image
No description has been provided for this image
In [15]:
# Compute threshold for each pixel and each month without 2024
thresholds_per_pixel = reconstruction_error_pixel.sel(year=slice(2019, 2023)).quantile(0.99, dim="year")

# Including 2024
reconstruction_error_pixel_all = (original - reconstructed) ** 2
print("Reconstruction error shape:", reconstruction_error_pixel_all.shape)

anomalies_pixel = reconstruction_error_pixel_all > thresholds_per_pixel

anomalies_da = xr.DataArray(
    anomalies_pixel,
    dims=["year", "month", "latitude", "longitude"],
    coords={
        "year": original.year,
        "month": original.month,
        "latitude": original.latitude,
        "longitude": original.longitude,
    }
)

anomaly_list_ds = anomalies_da.where(anomalies_da, drop=True).to_dataframe(name="anomaly").reset_index()
anomaly_list_ds["error"] = reconstruction_error_pixel_all.where(anomalies_da).to_dataframe(name="error").reset_index()["error"]

fig, axs = plt.subplots(3, 4, figsize=(18, 12), subplot_kw={'projection': ccrs.PlateCarree()})
fig.suptitle("CAE Anomaly Maps for All Months of 2024", fontsize=18)

for month_idx in range(12):
    ax = axs[month_idx // 4, month_idx % 4]
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
    ax.add_feature(cfeature.BORDERS, linewidth=0.5)
    ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
    ax.add_feature(cfeature.RIVERS, alpha=0.5)

    anomalies_da.sel(year=2024, month=month_idx + 1).plot.imshow(
        ax=ax, transform=ccrs.PlateCarree(), cmap="Reds", add_colorbar=False
    )
    ax.set_title(f"{months[month_idx]} 2024")
    ax.axis('off')

legend_elements = [
    Patch(facecolor='darkred', edgecolor='darkred', label='Anomaly'),
    Patch(facecolor='white', edgecolor='black', label='Not Anomaly')
]
ax.legend(handles=legend_elements, loc='lower right', frameon=True)
plt.show()



anomaly_percentages_2024 = []
for month in range(1, 13):
    mask = anomalies_da.sel(year=2024, month=month)
    total = mask.notnull().sum().item()
    anomalies = mask.sum().item()
    percent = (anomalies / total) * 100 if total > 0 else np.nan
    anomaly_percentages_2024.append(percent)

anomaly_percentages_2024_cnn_df = pd.DataFrame({
    'Month': months,
    'Anomaly % (2024)': np.round(anomaly_percentages_2024, 2)
})

print("Anomaly Percentages Table for 2024:")
print(anomaly_percentages_2024_cnn_df)

plt.hist(reconstruction_error_pixel_all.sel(year=2024, month=t).values.flatten(), bins=100)
plt.axvline(thresholds_per_month.sel(month=t).item(), color='r', linestyle='--', label='99th percentile')
plt.title(f"Reconstruction Errors for Month {t}")
plt.legend()
plt.show()
Reconstruction error shape: (6, 12, 91, 113)
No description has been provided for this image
Anomaly Percentages Table for 2024:
   Month  Anomaly % (2024)
0    Jan             25.83
1    Feb             17.83
2    Mar             21.09
3    Apr             17.10
4    May             26.60
5    Jun             24.23
6    Jul             20.09
7    Aug             39.45
8    Sep             40.79
9    Oct             20.83
10   Nov             21.75
11   Dec             43.37
No description has been provided for this image
In [ ]:
difference = original - reconstructed

fig, axs = plt.subplots(3, 4, figsize=(18, 12), subplot_kw={'projection': ccrs.PlateCarree()})
fig.suptitle("Difference (Original - Reconstructed) for Each Month of 2024", fontsize=18)

vmax = np.nanmax(abs(difference.values))
vmin = -vmax

for month_idx in range(12):
    ax = axs[month_idx // 4, month_idx % 4]
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
    ax.add_feature(cfeature.BORDERS, linewidth=0.5)
    ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
    ax.add_feature(cfeature.RIVERS, alpha=0.5)

    im = difference.isel(year=5, month=month_idx).plot.imshow(
        ax=ax,
        cmap="RdBu",
        add_colorbar=False,
        transform=ccrs.PlateCarree(),
        vmin=vmin,
        vmax=vmax
    )
    ax.set_title(f"{months[month_idx]} 2024", fontsize=10)

fig.subplots_adjust(right=0.88, wspace=0.2, hspace=0.25)  
cbar_ax = fig.add_axes([0.90, 0.25, 0.02, 0.5])  
cbar = fig.colorbar(im, cax=cbar_ax, orientation="vertical")
cbar.set_label("Difference (Original - Reconstructed)", fontsize=12)

plt.show()
No description has been provided for this image
In [33]:
reconstruction_2024 = reconstruction_error_pixel.sel(year=slice(2023, 2024))

# Flatten and drop NaNs
values_2024 = reconstruction_2024.values.flatten()
values_2024 = values_2024[np.isfinite(values_2024)]

# Define thresholds from 0.0 to 1.0
thresholds = np.linspace(0.0, 1.0, 101)

# Compute percentage of anomalies for each threshold
percentages = [(np.sum(values_2024 > t) / len(values_2024)) * 100 for t in thresholds]

# --- Plot ---
plt.figure(figsize=(10, 6))
plt.bar(thresholds, percentages, width=0.01, color='royalblue', alpha=0.7, edgecolor='black')
plt.xlabel("Reconstruction Error Threshold", fontsize=12)
plt.ylabel("Percentage of Anomalies (%)", fontsize=12)
plt.title("Percentage of Anomalies vs. Reconstruction Threshold (2024)", fontsize=14)
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image

Following two cells compute anomalies with the z-score method

In [18]:
climatology_mean = sif_patches.mean(dim="sample")
climatology_std = sif_patches.std(dim="sample")
print("Climatology mean shape:", climatology_mean.shape, "Climatology std shape:", climatology_std.shape, sif_monthly_resized.sel(year=years[5:]).squeeze("year").shape)

z_scores = (sif_monthly_resized.sel(year=years[5:]).squeeze("year") - climatology_mean) / climatology_std

anomaly_z = z_scores.where(np.abs(z_scores) > 2, other=0).astype(bool)

months_labels = ['Jan','Feb','Mar','Apr','May','Jun',
                 'Jul','Aug','Sep','Oct','Nov','Dec']

fig, axs = plt.subplots(3, 4, figsize=(18, 12), subplot_kw={'projection': ccrs.PlateCarree()})
fig.suptitle("Z-score Anomaly Maps for All Months of 2024 (|z| > 2)", fontsize=18)

anomaly_2024 = anomaly_z
for month_idx in range(12):
    ax = axs[month_idx // 4, month_idx % 4]
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    
    ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
    ax.add_feature(cfeature.BORDERS, linewidth=0.5)
    ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
    ax.add_feature(cfeature.RIVERS, alpha=0.5)
    
    anomaly_2024.isel(month=month_idx).plot.imshow(
        ax=ax,
        transform=ccrs.PlateCarree(),
        cmap="Reds",
        add_colorbar=False
    )
    
    ax.set_title(f"{months_labels[month_idx]} 2024")
    ax.axis("off")


legend_elements = [
    Patch(facecolor="darkred", edgecolor="darkred", label="Z-score Anomaly"),
    Patch(facecolor="white", edgecolor="black", label="Not Anomaly"),
]
axs[2, 3].legend(handles=legend_elements, loc="lower right", frameon=True)

plt.tight_layout()
plt.show()
Climatology mean shape: (12, 91, 113) Climatology std shape: (12, 91, 113) (12, 91, 113)
No description has been provided for this image
In [19]:
z_mapped = xr.where(z_scores > 2, 1,
             xr.where(z_scores < -2, -1, 0))

fig, axs = plt.subplots(3, 4, figsize=(18, 12),
                        subplot_kw={'projection': ccrs.PlateCarree()})
fig.suptitle("Z-score Anomaly Maps for All Months of 2024 (|z| > 2)", fontsize=18)

for month_idx in range(12):
    ax = axs[month_idx // 4, month_idx % 4]
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    
    ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
    ax.add_feature(cfeature.BORDERS, linewidth=0.5)
    ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
    ax.add_feature(cfeature.RIVERS, alpha=0.5)
    
    z_mapped.isel(month=month_idx).plot.imshow(
        ax=ax,
        transform=ccrs.PlateCarree(),
        cmap="RdBu",  # red for positive, blue for negative
        add_colorbar=False,
        vmin=-1, vmax=1
    )
    
    ax.set_title(f"{months_labels[month_idx]} 2024")
    ax.axis("off")

legend_elements = [
    Patch(facecolor="darkred", edgecolor="darkred", label="Z > 2 (Positive Anomaly)"),
    Patch(facecolor="darkblue", edgecolor="darkblue", label="Z < -2 (Negative Anomaly)"),
    Patch(facecolor="white", edgecolor="black", label="Normal (|Z| ≤ 2)"),
]
axs[2, 3].legend(handles=legend_elements, loc="lower right", frameon=True)

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
print("Anomaly mask shape:", anomaly_z.shape)
'anomaly_percentages_z' = []


year_percentages = []
for month_idx in range(12):
    mask = anomaly_z[month_idx]
    total = np.isfinite(mask).sum() 
    anomalies = np.nansum(mask)
    percent = (anomalies / total) * 100 if total > 0 else np.nan
    year_percentages.append(percent)
anomaly_percentages_z.append(year_percentages)


months_short = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
anomaly_z_df = pd.DataFrame(np.array(anomaly_percentages_z[0]).T, columns=[2024], index=months_short)

print("Percentage of anomalies in 2024 (z-score method):")
print(anomaly_z_df.round(3))
Anomaly mask shape: (12, 91, 113)
Percentage of anomalies in 2024 (z-score method):
       2024
Jan   9.161
Feb   2.674
Mar   6.837
Apr   3.161
May   3.151
Jun   0.953
Jul   0.613
Aug  16.279
Sep  35.807
Oct   3.734
Nov   3.763
Dec  25.304
In [36]:
zvalues_2024 = z_scores.values.flatten()
zvalues_2024 = zvalues_2024[np.isfinite(zvalues_2024)]

# Define thresholds from 0.0 to 1.0
thresholds = np.linspace(0.0, 5.0, 101)

# Compute percentage of anomalies for each threshold
percentages = [(np.sum(zvalues_2024 > t) / len(zvalues_2024)) * 100 for t in thresholds]

# --- Plot ---
plt.figure(figsize=(10, 6))
plt.bar(thresholds, percentages, width=0.01, color='royalblue', alpha=0.7, edgecolor='black')
plt.xlabel("Z Scores Error Threshold", fontsize=12)
plt.ylabel("Percentage of Anomalies (%)", fontsize=12)
plt.title("Percentage of Anomalies vs. Z Score Threshold (2024)", fontsize=14)
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
reconstruction_2024 = reconstruction_error_pixel.sel(year=slice(2023, 2024))

# Flatten and drop NaNs
values_2024 = reconstruction_2024.values.flatten()
values_2024 = values_2024[np.isfinite(values_2024)]

# Define thresholds from 0.0 to 1.0
thresholds = np.linspace(0.0, 1.0, 101)

# Compute percentage of anomalies for each threshold
percentages = [(np.sum(values_2024 > t) / len(values_2024)) * 100 for t in thresholds]

# --- Plot ---
plt.figure(figsize=(10, 6))
plt.bar(thresholds, percentages, width=0.01, color='royalblue', alpha=0.7, edgecolor='black')
plt.xlabel("Reconstruction Error Threshold", fontsize=12)
plt.ylabel("Percentage of Anomalies (%)", fontsize=12)
plt.title("Percentage of Anomalies vs. Reconstruction Threshold (2024)", fontsize=14)
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()
In [30]:
####FIX 
reconstruction_error_2024 = reconstruction_error_pixel_all.sel(year=2024).clip(max=1)

fig, axs = plt.subplots(3, 4, figsize=(18, 12), subplot_kw={'projection': ccrs.PlateCarree()})
fig.suptitle("Reconstruction Error Maps for All Months of 2024", fontsize=18)

for month_idx in range(12):
    ax = axs[month_idx // 4, month_idx % 4]
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
    ax.add_feature(cfeature.BORDERS, linewidth=0.5)
    ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
    ax.add_feature(cfeature.RIVERS, alpha=0.5)

    im = reconstruction_error_2024.isel(month=month_idx).plot.imshow(
        ax=ax,
        cmap="Blues",
        origin="lower",
        add_colorbar=False,
        transform=ccrs.PlateCarree(),
    )
    ax.set_title(f"{months[month_idx]} 2024")
    ax.axis("off")
    
fig.subplots_adjust(right=0.88, wspace=0.2, hspace=0.25)  # leave space for colorbar
cbar_ax = fig.add_axes([0.90, 0.25, 0.02, 0.5])  # [left, bottom, width, height]
cbar = fig.colorbar(im, cax=cbar_ax, orientation="vertical")
cbar.set_label("Reconstruction Error", fontsize=12)

plt.show()



fig, axs = plt.subplots(3, 4, figsize=(18, 12), subplot_kw={'projection': ccrs.PlateCarree()})
fig.suptitle("Clipped Z-scores for All Months of 2024", fontsize=18)

for month_idx in range(12):
    ax = axs[month_idx // 4, month_idx % 4]
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
    ax.add_feature(cfeature.BORDERS, linewidth=0.5)
    ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
    ax.add_feature(cfeature.RIVERS, alpha=0.5)

    # Plot clipped z-scores for the month
    im = z_scores.isel(month=month_idx).plot.imshow(
        ax=ax,
        cmap="Blues",
        origin="lower",
        add_colorbar=False,
        transform=ccrs.PlateCarree(),
    )
    ax.set_title(f"{months[month_idx]} 2024")

fig.subplots_adjust(right=0.88, wspace=0.2, hspace=0.25)  # leave space for colorbar
cbar_ax = fig.add_axes([0.90, 0.25, 0.02, 0.5])  # [left, bottom, width, height]
cbar = fig.colorbar(im, cax=cbar_ax, orientation="vertical")
cbar.set_label("Clipped Z-score [-6, 6]", fontsize=12)

plt.show()
No description has been provided for this image
No description has been provided for this image

Here starts the comparison of anomaly results between the two methods, as well as comparing the results with the droughts recorded by Monitor de Secas (MS) (see master thesis for more information). You will observe in the following cells both a visual and numerical comparison.

In [22]:
mean_anomaly_zscore_2024 = np.array(anomaly_percentages_z[0]) 
mean_anomaly_cae_2024 = np.array(anomaly_percentages_2024)  

plt.figure(figsize=(10, 6))
bar_width = 0.4
x = np.arange(len(months))

plt.bar(x - bar_width/2, mean_anomaly_cae_2024, width=bar_width, label='CAE Anomalies (%)', color='red', alpha=0.7)
plt.bar(x + bar_width/2, mean_anomaly_zscore_2024, width=bar_width, label='Z-score Anomalies (%)', color='blue', alpha=0.7)

plt.xticks(x, months, rotation=45)
plt.ylabel('Anomaly Percentage (%)')
plt.title('Monthly Anomaly Percentage Comparison (2024)')
plt.legend()
plt.tight_layout()
plt.show()
No description has been provided for this image
In [23]:
cnn_anomalies_2024 = anomalies_pixel[5] 
zscore_anomalies_2024 = anomaly_z

month_idx = 5 #september

zscore_anomalies_2024_resized = np.empty((12, 91, 113), dtype=bool)
for m in range(12):
    zscore_anomalies_2024_resized[m] = resize(
        zscore_anomalies_2024[m].astype(float),
        (91, 113),
        order=0,  
        preserve_range=True,
        anti_aliasing=False
    ).astype(bool)

#secas image 
secas_septem_img = mpimg.imread('pic/secas_sept.png')

fig = plt.figure(figsize=(24, 6))

proj = ccrs.PlateCarree()
extent = [lon_min, lon_max, lat_min, lat_max]

# CNN-based anomaly map
ax0 = plt.subplot(1, 3, 1, projection=proj)
ax0.set_extent(extent, crs=proj)
ax0.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax0.add_feature(cfeature.BORDERS, linewidth=0.7)
ax0.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax0.add_feature(cfeature.RIVERS, alpha=0.5)
im0 = ax0.imshow(cnn_anomalies_2024[month_idx], cmap='Reds', origin='lower', extent=extent, transform=proj)
ax0.set_title("CNN (Autoencoder) Anomalies")
ax0.axis('off')

# z-score-based anomaly map
ax1 = plt.subplot(1, 3, 2, projection=proj)
ax1.set_extent(extent, crs=proj)
ax1.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
ax1.add_feature(cfeature.BORDERS, linewidth=0.7)
ax1.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax1.add_feature(cfeature.RIVERS, alpha=0.5)
im1 = ax1.imshow(zscore_anomalies_2024_resized[month_idx], cmap='Reds', origin='lower', extent=extent, transform=proj)
ax1.set_title("Z-score Anomalies")
ax1.axis('off')

# SECA's October image
ax2 = plt.subplot(1, 3, 3, projection=proj)
ax2.set_extent(extent, crs=proj)
im2 = ax2.imshow(secas_septem_img, extent=extent, transform=proj)
ax2.set_title("SECA's Drought")
ax2.axis('off')


plt.show()
No description has been provided for this image
In [ ]:
secas_sept_path = os.path.join('pic', 'secas_sept.png')
secas_images = sorted(glob.glob('secas_*.png') + glob.glob(os.path.join('pic', 'secas_*.png')))

# idea is to create a drought mask for each image based on the white pixels
drought_masks = {}
for img_path in secas_images:
    imga = mpimg.imread(img_path)
    img = cv2.cvtColor(imga, cv2.COLOR_BGR2RGB)
    target_shape = [113, 91]
    img_rgb_resized = np.flipud(cv2.resize(img, target_shape, interpolation=cv2.INTER_NEAREST))
    white_threshold = 0.3
    white_pixels = np.all(img_rgb_resized > white_threshold, axis=-1)
    drought_mask = (~white_pixels).astype(int)
    drought_masks[img_path] = drought_mask
    print(f"{img_path}: unique values {np.unique(drought_mask)}, shape {drought_mask.shape}")
imga = mpimg.imread(secas_sept_path)
drought_mask = drought_masks[secas_sept_path]
pic/secas_apr.png: unique values [0 1], shape (91, 113)
pic/secas_aug.png: unique values [0 1], shape (91, 113)
pic/secas_dec.png: unique values [0 1], shape (91, 113)
pic/secas_feb.png: unique values [0 1], shape (91, 113)
pic/secas_jan.png: unique values [0 1], shape (91, 113)
pic/secas_jul.png: unique values [0 1], shape (91, 113)
pic/secas_jun.png: unique values [0 1], shape (91, 113)
pic/secas_mar.png: unique values [0 1], shape (91, 113)
pic/secas_may.png: unique values [0 1], shape (91, 113)
pic/secas_nov.png: unique values [0 1], shape (91, 113)
pic/secas_oct.png: unique values [0 1], shape (91, 113)
pic/secas_sept.png: unique values [0 1], shape (91, 113)
In [25]:
# plot only brazil's anomalies for selected months: June (5), August (7), September (8), October (9), November (10), December (11)
selected_months = [5, 7, 8, 9, 10, 11]
fig, axs = plt.subplots(2, 6, figsize=(22, 7))

for i, m_idx in enumerate(selected_months):
    masked_cnn = np.where(drought_mask == 1, cnn_anomalies_2024[m_idx], 0)
    axs[0, i].imshow(masked_cnn, cmap='Reds', origin='lower', extent=[lon_min, lon_max, lat_min, lat_max])
    axs[0, i].set_title(f'CNN Anomaly - {months[m_idx]}')
    axs[0, i].axis('off')
    
    masked_zscore = np.where(drought_mask == 1, zscore_anomalies_2024_resized[m_idx], 0)
    axs[1, i].imshow(masked_zscore, cmap='Reds', origin='lower', extent=[lon_min, lon_max, lat_min, lat_max])
    axs[1, i].set_title(f'Z-score Anomaly - {months[m_idx]}')
    axs[1, i].axis('off')

plt.tight_layout()
plt.show()
No description has been provided for this image
In [ ]:
selected_months = [4, 5, 7, 8, 9, 10, 11]
dice_scores = []
iou_scores = []

for m_idx in selected_months:
    masked_anomaly_cnn = np.where(drought_mask == 1, cnn_anomalies_2024[m_idx], 0)
    intersection = np.sum(masked_anomaly_cnn * drought_mask)
    total_pixels = np.sum(masked_anomaly_cnn) + np.sum(drought_mask)
    dice = 1.0 if total_pixels == 0 else 2 * intersection / total_pixels
    dice_scores.append(dice)

    intersection_iou = np.sum(cnn_anomalies_2024[m_idx] * drought_mask)
    union = np.sum((cnn_anomalies_2024[m_idx] + drought_mask) > 0)
    iou = 1.0 if union == 0 else intersection_iou / union
    iou_scores.append(iou)

for i, m_idx in enumerate(selected_months):
    print(f"{months[m_idx]}: Dice = {dice_scores[i]:.4f}, IoU = {iou_scores[i]:.4f}")
May: Dice = 0.3572, IoU = 0.1384
Jun: Dice = 0.3427, IoU = 0.1368
Aug: Dice = 0.7487, IoU = 0.3807
Sep: Dice = 0.7914, IoU = 0.4213
Oct: Dice = 0.3832, IoU = 0.1717
Nov: Dice = 0.3145, IoU = 0.1279
Dec: Dice = 0.6730, IoU = 0.2851
In [ ]:
selected_months = [4,5, 7, 8, 9, 10, 11]
dice_scores_z = []
iou_scores_z = []

for m_idx in selected_months:
    masked_anomaly_z = np.where(drought_mask == 1, zscore_anomalies_2024_resized[m_idx], 0)
    intersection = np.sum(masked_anomaly_z * drought_mask)
    total_pixels = np.sum(masked_anomaly_z) + np.sum(drought_mask)
    dice = 1.0 if total_pixels == 0 else 2 * intersection / total_pixels
    dice_scores_z.append(dice)

    intersection_iou = np.sum(zscore_anomalies_2024_resized[m_idx] * drought_mask)
    union = np.sum((zscore_anomalies_2024_resized[m_idx] + drought_mask) > 0)
    iou = 1.0 if union == 0 else intersection_iou / union
    iou_scores_z.append(iou)

for i, m_idx in enumerate(selected_months):
    print(f"{months[m_idx]}: Dice = {dice_scores_z[i]:.4f}, IoU = {iou_scores_z[i]:.4f}")
May: Dice = 0.0126, IoU = 0.0058
Jun: Dice = 0.0279, IoU = 0.0139
Aug: Dice = 0.4833, IoU = 0.2737
Sep: Dice = 0.7419, IoU = 0.4006
Oct: Dice = 0.0834, IoU = 0.0408
Nov: Dice = 0.1162, IoU = 0.0588
Dec: Dice = 0.4607, IoU = 0.2063

The following cell makes a plot with all the droughts recorded per month of 2024 out of the images provided my the Monitor de Secas (MS).

In [28]:
month_names = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sept', 'oct', 'nov', 'dec']
secas_pngs = [os.path.join('pic', f'secas_{m}.png') for m in month_names]


fig, axs = plt.subplots(4, 4, figsize=(16, 16))
for idx, ax in enumerate(axs.flat):
    if idx < len(secas_pngs):
        img_path = secas_pngs[idx]
        if os.path.exists(img_path):
            img = mpimg.imread(img_path)
            ax.imshow(img)
            ax.set_title(month_names[idx].capitalize())
        else:
            ax.axis('off')
    else:
        ax.axis('off')
    ax.axis('off')
plt.tight_layout()
plt.show()
No description has been provided for this image

Here starts the same study but comparing it with the drought dataset: SPEI

In [29]:
ds = xr.open_dataset("spei01.nc")
print(ds)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/file_manager.py:211, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
    210 try:
--> 211     file = self._cache[self._key]
    212 except KeyError:

File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/lru_cache.py:56, in LRUCache.__getitem__(self, key)
     55 with self._lock:
---> 56     value = self._cache[key]
     57     self._cache.move_to_end(key)

KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/Users/carmen/Desktop/SIF_anomalies/spei01.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False)), '5301ee5f-e76f-4cc6-b89b-0789ecbf82ce']

During handling of the above exception, another exception occurred:

FileNotFoundError                         Traceback (most recent call last)
Cell In[29], line 1
----> 1 ds = xr.open_dataset("spei01.nc")
      2 print(ds)

File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/api.py:588, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
    576 decoders = _resolve_decoders_kwargs(
    577     decode_cf,
    578     open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
    584     decode_coords=decode_coords,
    585 )
    587 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 588 backend_ds = backend.open_dataset(
    589     filename_or_obj,
    590     drop_variables=drop_variables,
    591     **decoders,
    592     **kwargs,
    593 )
    594 ds = _dataset_from_backend_dataset(
    595     backend_ds,
    596     filename_or_obj,
   (...)
    606     **kwargs,
    607 )
    608 return ds

File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/netCDF4_.py:645, in NetCDF4BackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, format, clobber, diskless, persist, lock, autoclose)
    624 def open_dataset(  # type: ignore[override]  # allow LSP violation, not supporting **kwargs
    625     self,
    626     filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
   (...)
    642     autoclose=False,
    643 ) -> Dataset:
    644     filename_or_obj = _normalize_path(filename_or_obj)
--> 645     store = NetCDF4DataStore.open(
    646         filename_or_obj,
    647         mode=mode,
    648         format=format,
    649         group=group,
    650         clobber=clobber,
    651         diskless=diskless,
    652         persist=persist,
    653         lock=lock,
    654         autoclose=autoclose,
    655     )
    657     store_entrypoint = StoreBackendEntrypoint()
    658     with close_on_error(store):

File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/netCDF4_.py:408, in NetCDF4DataStore.open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose)
    402 kwargs = dict(
    403     clobber=clobber, diskless=diskless, persist=persist, format=format
    404 )
    405 manager = CachingFileManager(
    406     netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
    407 )
--> 408 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)

File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/netCDF4_.py:355, in NetCDF4DataStore.__init__(self, manager, group, mode, lock, autoclose)
    353 self._group = group
    354 self._mode = mode
--> 355 self.format = self.ds.data_model
    356 self._filename = self.ds.filepath()
    357 self.is_remote = is_remote_uri(self._filename)

File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/netCDF4_.py:417, in NetCDF4DataStore.ds(self)
    415 @property
    416 def ds(self):
--> 417     return self._acquire()

File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/netCDF4_.py:411, in NetCDF4DataStore._acquire(self, needs_lock)
    410 def _acquire(self, needs_lock=True):
--> 411     with self._manager.acquire_context(needs_lock) as root:
    412         ds = _nc4_require_group(root, self._group, self._mode)
    413     return ds

File /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/contextlib.py:117, in _GeneratorContextManager.__enter__(self)
    115 del self.args, self.kwds, self.func
    116 try:
--> 117     return next(self.gen)
    118 except StopIteration:
    119     raise RuntimeError("generator didn't yield") from None

File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/file_manager.py:199, in CachingFileManager.acquire_context(self, needs_lock)
    196 @contextlib.contextmanager
    197 def acquire_context(self, needs_lock=True):
    198     """Context manager for acquiring a file."""
--> 199     file, cached = self._acquire_with_cache_info(needs_lock)
    200     try:
    201         yield file

File ~/Desktop/SIF_anomalies/venv/lib/python3.9/site-packages/xarray/backends/file_manager.py:217, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
    215     kwargs = kwargs.copy()
    216     kwargs["mode"] = self._mode
--> 217 file = self._opener(*self._args, **kwargs)
    218 if self._mode == "w":
    219     # ensure file doesn't get overridden when opened again
    220     self._mode = "a"

File src/netCDF4/_netCDF4.pyx:2521, in netCDF4._netCDF4.Dataset.__init__()

File src/netCDF4/_netCDF4.pyx:2158, in netCDF4._netCDF4._ensure_nc_success()

FileNotFoundError: [Errno 2] No such file or directory: '/Users/carmen/Desktop/SIF_anomalies/spei01.nc'
In [ ]:
lat_min, lat_max = -15, 5
lon_min, lon_max = -75, -50


ds_2024 = ds.sel(time=slice("2024-01-01", "2024-12-01"))

ds_amazon_2024 = ds_2024.sel(
    lat=slice(lat_min, lat_max),
    lon=slice(lon_min, lon_max)
)

print(ds_amazon_2024)
<xarray.Dataset> Size: 24kB
Dimensions:  (lon: 25, lat: 20, time: 12)
Coordinates:
  * lon      (lon) float64 200B -74.5 -73.5 -72.5 -71.5 ... -52.5 -51.5 -50.5
  * lat      (lat) float64 160B -14.5 -13.5 -12.5 -11.5 ... 1.5 2.5 3.5 4.5
  * time     (time) datetime64[ns] 96B 2024-01-01 2024-02-01 ... 2024-12-01
Data variables:
    spei     (time, lat, lon) float32 24kB ...
In [ ]:
fig, axs = plt.subplots(3, 4, figsize=(18, 12), subplot_kw={'projection': ccrs.PlateCarree()})

for month_idx in range(12):
    ax = axs[month_idx // 4, month_idx % 4]
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
    ax.add_feature(cfeature.BORDERS, linewidth=0.5)
    ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
    ax.add_feature(cfeature.RIVERS, alpha=0.5)
    im = ax.imshow(ds_amazon_2024['spei'].isel(time=month_idx), cmap='RdBu', vmin=-4.2, vmax=4.2, origin='lower', extent=[lon_min, lon_max, lat_min, lat_max], transform=ccrs.PlateCarree())
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    ax.set_title(f"{months[month_idx]} 2024")
    fig.colorbar(im, ax=ax, orientation='vertical', fraction=0.02, pad=0.04)

plt.show()


# Create a binary mask for values below -0.8
spei_month_anomaly = (ds_amazon_2024['spei'] < -0.8).astype(int)

fig, axs = plt.subplots(3, 4, figsize=(18, 12), subplot_kw={'projection': ccrs.PlateCarree()})

for month_idx in range(12):
    ax = axs[month_idx // 4, month_idx % 4]
    ax.set_extent([lon_min, lon_max, lat_min, lat_max], crs=ccrs.PlateCarree())
    ax.add_feature(cfeature.LAND, facecolor="lightgray", edgecolor="black", alpha=0.3)
    ax.add_feature(cfeature.BORDERS, linewidth=0.5)
    ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
    ax.add_feature(cfeature.RIVERS, alpha=0.5)
    im = ax.imshow(spei_month_anomaly.isel(time=month_idx), cmap='Reds', origin='lower', extent=[lon_min, lon_max, lat_min, lat_max], transform=ccrs.PlateCarree())
    ax.set_title(f"{months[month_idx]} 2024")
    fig.colorbar(im, ax=ax, orientation='vertical', fraction=0.02, pad=0.04)

plt.tight_layout()
plt.show()


anomaly_spei_percentage = []
for month_idx in range(12):
    mask = spei_month_anomaly[month_idx]
    total = np.isfinite(mask).sum() 
    anomalies = np.nansum(mask)
    percent = (anomalies / total) * 100 if total > 0 else np.nan
    anomaly_spei_percentage.append(percent)


months_short = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
anomaly_spei_df = pd.DataFrame(np.array(anomaly_spei_percentage).T, columns=[2024], index=months_short)

print("Percentage of anomalies in 2024 (SPEI method):")
print(anomaly_spei_df.round(3))
No description has been provided for this image
No description has been provided for this image
Percentage of anomalies in 2024 (SPEI method):
     2024
Jan  55.0
Feb  56.2
Mar  65.6
Apr  51.4
May  42.4
Jun  73.2
Jul  76.4
Aug  88.2
Sep  96.6
Oct  74.2
Nov  45.0
Dec  43.6
In [ ]:
target_lat = np.linspace(lat_min, lat_max, 20)
target_lon = np.linspace(lon_min, lon_max, 25)

anomalies_cnn_2024_f = anomalies_da.sel(year=2024).astype(float)
print(anomalies_cnn_2024_f.shape)
cnn_anomalies_2024_resized = anomalies_cnn_2024_f.interp(latitude=target_lat, longitude=target_lon)
cnn_anomalies_2024_resized = cnn_anomalies_2024_resized = cnn_anomalies_2024_resized.round().astype(int).values

# #resize cnn_anomalies_2024 to (20,25)
# cnn_anomalies_2024_resized = np.zeros((12, 20, 25))
# for i in range(12):
#     cnn_anomalies_2024_resized[i] = resize(cnn_anomalies_2024[i], (20, 25))

#compute Dice and IoU for selected months of masked anomalies (CNN) vs Secas drought mask
selected_months = [4, 5, 7, 8, 9, 10, 11]
dice_scores = []
iou_scores = []

for m_idx in selected_months:
    intersection = np.sum(cnn_anomalies_2024_resized[m_idx] * spei_month_anomaly.isel(time=m_idx))
    total_pixels = np.sum(cnn_anomalies_2024_resized[m_idx]) + np.sum(spei_month_anomaly.isel(time=m_idx))
    dice = 1.0 if total_pixels == 0 else 2 * intersection / total_pixels
    dice_scores.append(dice)

    intersection_iou = np.sum(cnn_anomalies_2024_resized[m_idx] * spei_month_anomaly.isel(time=m_idx))
    union = np.sum((cnn_anomalies_2024_resized[m_idx] + spei_month_anomaly.isel(time=m_idx)) > 0)
    iou = 1.0 if union == 0 else intersection_iou / union
    iou_scores.append(iou)

for i, m_idx in enumerate(selected_months):
    print(f"{months[m_idx]}: Dice = {dice_scores[i]:.4f}, IoU = {iou_scores[i]:.4f}")
(12, 91, 113)
May: Dice = 0.2736, IoU = 0.1585
Jun: Dice = 0.3361, IoU = 0.2020
Aug: Dice = 0.4354, IoU = 0.2783
Sep: Dice = 0.5760, IoU = 0.4045
Oct: Dice = 0.4633, IoU = 0.3015
Nov: Dice = 0.3214, IoU = 0.1915
Dec: Dice = 0.3207, IoU = 0.1909
/Users/carmenoliver/.pyenv/versions/sif_env/lib/python3.11/site-packages/xarray/core/duck_array_ops.py:237: RuntimeWarning: invalid value encountered in cast
  return data.astype(dtype, **kwargs)
In [ ]:
# #resize cnn_anomalies_2024 to (20,25)
# zscore_anomalies_2024_resized2 = np.zeros((12, 20, 25))
# for i in range(12):
#     zscore_anomalies_2024_resized2[i] = resize(zscore_anomalies_2024_resized[m_idx]
# [i], (20, 25))

anomaly_2024_f = anomaly_2024.astype(float)

target_lat = np.linspace(lat_min, lat_max, 20)
target_lon = np.linspace(lon_min, lon_max, 25)

zscore_anomalies_2024_resized2 = anomaly_2024_f.interp(
    latitude=target_lat,
    longitude=target_lon)

zscore_anomalies_2024_resized2 = (zscore_anomalies_2024_resized2 >= 0.5).astype(int).values

#compute Dice and IoU for selected months of masked anomalies (CNN) vs Secas drought mask
selected_months = [4, 5, 7, 8, 9, 10, 11]
dice_scores = []
iou_scores = []

for m_idx in selected_months:
    intersection = np.sum(zscore_anomalies_2024_resized2[m_idx] * spei_month_anomaly.isel(time=m_idx))
    total_pixels = np.sum(zscore_anomalies_2024_resized2[m_idx]) + np.sum(spei_month_anomaly.isel(time=m_idx))
    dice = 1.0 if total_pixels == 0 else 2 * intersection / total_pixels
    dice_scores.append(dice)

    intersection_iou = np.sum(zscore_anomalies_2024_resized2[m_idx] *spei_month_anomaly.isel(time=m_idx))
    union = np.sum((zscore_anomalies_2024_resized2[m_idx] + spei_month_anomaly.isel(time=m_idx)) > 0)
    iou = 1.0 if union == 0 else intersection_iou / union
    iou_scores.append(iou)

for i, m_idx in enumerate(selected_months):
    print(f"{months[m_idx]}: Dice = {dice_scores[i]:.4f}, IoU = {iou_scores[i]:.4f}")
May: Dice = 0.0088, IoU = 0.0044
Jun: Dice = 0.0322, IoU = 0.0163
Aug: Dice = 0.2554, IoU = 0.1464
Sep: Dice = 0.4867, IoU = 0.3216
Oct: Dice = 0.0521, IoU = 0.0267
Nov: Dice = 0.0431, IoU = 0.0220
Dec: Dice = 0.0181, IoU = 0.0091
In [ ]:
# Clip z_scores to a range and take absolute values
z_scores_clipped = z_scores.pipe(abs)

# Take absolute values of SPEI
spei_abs = ds_amazon_2024['spei'].pipe(abs)
z_scores_resized = z_scores_clipped.interp(latitude=target_lat, longitude=target_lon)
reconstruction_error_pixel_2024_resized = reconstruction_error_pixel_all.sel(year=2024).interp(latitude=target_lat, longitude=target_lon).clip(max=1)
print(z_scores_resized.shape, cnn_anomalies_2024_resized.shape, ds_amazon_2024['spei'].shape)
month_idx = 0

plt.scatter(spei_abs, z_scores_resized, alpha=0.04)
plt.xlabel("SPEI")
plt.ylabel("Z-scores")
plt.title("SPEI vs Z-scores (Sept 2024)")
plt.show()

plt.scatter(spei_abs, reconstruction_error_pixel_2024_resized, alpha=0.04)
plt.xlabel("SPEI")
plt.ylabel("Reconstruction Error")
plt.title("SPEI vs Reconstruction Error (Sept 2024)")
plt.show()

plt.scatter(reconstruction_error_pixel_2024_resized, 
            z_scores_resized, alpha=0.04)
plt.ylabel("Z-scores")
plt.xlabel("Reconstruction Error")
plt.title("Z-scores vs Reconstruction Error (Sept 2024)")
plt.show()
(12, 20, 25) (12, 20, 25) (12, 20, 25)
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
# Flatten the data for correlation computation
spei_flat = spei_abs.values.flatten()
z_scores_flat = z_scores_resized.values.flatten()
reconstruction_error_flat = reconstruction_error_pixel_2024_resized.values.flatten()

# Remove NaN values for valid correlation computation
valid_mask = ~np.isnan(spei_flat) & ~np.isnan(z_scores_flat) & ~np.isnan(reconstruction_error_flat)
spei_flat = spei_flat[valid_mask]
z_scores_flat = z_scores_flat[valid_mask]
reconstruction_error_flat = reconstruction_error_flat[valid_mask]

# Compute correlations
corr_spei_z_scores = np.corrcoef(spei_flat, z_scores_flat)[0, 1]
corr_spei_reconstruction_error = np.corrcoef(spei_flat, reconstruction_error_flat)[0, 1]
corr_z_scores_reconstruction_error = np.corrcoef(z_scores_flat, reconstruction_error_flat)[0, 1]

# Print the results
print(f"Correlation between SPEI and Z-scores: {corr_spei_z_scores:.3f}")
print(f"Correlation between SPEI and Reconstruction Error: {corr_spei_reconstruction_error:.3f}")
print(f"Correlation between Z-scores and Reconstruction Error: {corr_z_scores_reconstruction_error:.3f}")
Correlation between SPEI and Z-scores: 0.137
Correlation between SPEI and Reconstruction Error: 0.044
Correlation between Z-scores and Reconstruction Error: 0.247
In [ ]:
anomaly_percentages_2024
anomaly_percentages_z
# Flatten the anomaly percentages for 2024
anomaly_percentages_2024_flat = np.array(anomaly_percentages_2024)
anomaly_percentages_z_flat = np.array(anomaly_percentages_z[0])

# Compute correlation
correlation = np.corrcoef(anomaly_percentages_2024_flat, anomaly_percentages_z_flat)[0, 1]
print(f"Correlation between CAE and Z-score anomaly percentages: {correlation:.3f}")

# Scatter plot
plt.figure(figsize=(8, 6))
plt.scatter(anomaly_percentages_2024_flat, anomaly_percentages_z_flat, color='blue', alpha=0.7)
plt.xlabel("CAE Anomaly Percentage (%)")
plt.ylabel("Z-score Anomaly Percentage (%)")
plt.title("Correlation between CAE and Z-score Anomaly Percentages")
plt.grid(True)
plt.tight_layout()
plt.show()
Correlation between CAE and Z-score anomaly percentages: 0.661
No description has been provided for this image